# topic 15 # First, set up the situation. We have a population # with a known standard deviation. # source("../gnrnd5.R") source("../gnrnd4.R") gnrnd5( 169453199902, 873001383) big_pop <- L1 # I can tell you that I know the standard deviation # of big_pop is 17.12479 # Now, someone says that they believe that the # mean of big_pop is 150. Thus our null hypothesis # is that the true mean is 150. We want to see if that # could be correct. We will get a sample of # size 35 and we will look at the sample mean. # We are willing to be wrong in telling the # person that they are wrong 2.5% of the time! ######################## ## The critical value approach. We know that ## samples of size 35 will have a standard error ## of the mean be 17.12479/sqrt(35) and that those ## values will be normally distributed with the ## same mean as the population, a value we assume ## to be 150. This is a two-tailed test because ## a sample mean that is either too low or too high ## would indicate that 150 is not the mean. ## Therefore, find the value that has 0.025/2 as the ## P(Xz) # long way low_z <- qnorm(0.025/2) low_z low_val <- 150 + low_z*17.12479/sqrt(35) # + because z is <0 low_val high_z <- qnorm( 0.025/2, lower.tail=FALSE) high_z # this was silly because we know it is -low_z high_val <- 150 + high_z*17.12479/sqrt(35) high_val ####### pause and look at a shorter way to get ####### those two values low_val <- qnorm( 0.025/2, mean=150, sd=17.124479/sqrt(35)) low_val high_val <-qnorm( 0.025/2, mean=150, sd=17.124479/sqrt(35), lower.tail=FALSE) high_val ########## ########## # Get our sample # the first time we do this let us get the # same sample each time gnrnd4(768733401, 200000001) L1 # take those as the index values of our random sample our_samp <- big_pop[ L1 ] our_samp # find the mean of our sample mean( our_samp ) ## so now compare that mean to our critical values. ## In this case the sample mean is greater than ## our critical high. Therefore reject the ## null hypothesis that the true mean is 150. ############# ############# the attained significance approach ## how strange would it be to get a mean of ## 160.3743 for a sample of size 35 if the true ## mean is 150? # pnorm( 160.3743, mean=150, sd=17.12479/sqrt(35), lower.tail=FALSE) # ## but we would need to double that to account for ## values that extreme or more extreme on the low ## side 0.0001691883*2 # is that probability less than our 2.5% ? # Yes, therefore, reject the null hypothesis # in favor of the alternative. ####################### Now use the function to ####################### do the same thing source("../hypo_known.R") hypoth_test_known( 150, 17.12479, 0, 0.025, 35, mean(our_samp) ) ####################################### ####################################### # Now we want to repeat this process # but each time we want a different sample # of size 35 L1 <- sample( big_pop, 35 ) L1 hypoth_test_known( 150, 17.12479, 0, 0.025, 35, mean(L1) ) #### perform lines 102-106 again and again ### now, since we have the population let us peek # at the true mean mean( big_pop ) ####### Try our samples again, but this time test ## the null hypothesis that the true mean ## is 160.2938, and do the test at the 0.05 ## level of significance. L1 <- sample( big_pop, 35 ) x_bar <- mean( L1 ) hypoth_test_known( 160.2938, 17.12479, 0, 0.05, 35, mean(L1) ) #### perform lines 116-120 again and again, #### and we should see a Type I error about #### 5% of the time. ### we can actually do this 1000 times and see how ### times we reject the null hypothesis even ### though it is true. L2 <- 1:1000 for( i in 1:1000) { L1 <- sample( big_pop, 35 ) x_bar <- mean( L1 ) s_x <- sd( L1 ) answer <- hypoth_test_known( 160.2938, 17.12479, 0, 0.05, 35, mean(L1) ) L2[i] <- answer[13] } table( L2 )